Regression modeling
with Guediawaye Census data
#"hh_size",
predictors = c("menage","Aucun","Primair","Moyen","Secondr","fertlty","Ltrcy_F","nbr_cm_h","nbr_cm_f","pct_cm_f")
outcome = "MPI_men"
Inspecting the
outcome variable (MPI) with visualization
mhv_map <- ggplot(MPI_data_dr_2013, aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI ")
mhv_histogram <- ggplot(MPI_data_dr_2013, aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
labs(x = "MPI")
mhv_map + mhv_histogram + labs(title = "MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>%
ggplot(aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI ")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(MPI_data_dr, aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
labs(x = "MPI")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

mhv_map_log <- ggplot(MPI_data_dr_2013, aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI\nvalue (log)")
mhv_histogram_log <- ggplot(MPI_data_dr_2013, aes(x = log(MPI_men))) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous() +
labs(x = "MPI (log)")
mhv_map_log + mhv_histogram_log + labs(title = "Logged MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>%
ggplot(aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI\nvalue (log)")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

A first regression
model
library(sf)
library(units)
predictors = c("menage","Primair","Moyen","Secondr","Ltrcy_F","pct_cm_f","hh_size","Ltrcy_A","Ltrcy_W","Ltrcy_M","fertlty")
outcome = "MPI_men"
MPI_data_dr_2013_for_model<- MPI_data_dr_2013 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
##
MPI_data_dr_2002_for_model<- MPI_data_dr_2002 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
formula <- "log(MPI_men) ~ menage + Primair + Moyen + Secondr + Ltrcy_F + pct_cm_f + pop_density + Ltrcy_A + Ltrcy_W + Ltrcy_M + fertlty"
model_2013 <- lm(formula = formula, data = MPI_data_dr_2013_for_model)
summary(model_2013)
##
## Call:
## lm(formula = formula, data = MPI_data_dr_2013_for_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9660 -0.1522 0.0151 0.1668 0.9155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.206e+00 7.881e-02 -27.993 < 2e-16 ***
## menage 8.920e-03 8.090e-04 11.026 < 2e-16 ***
## Primair 4.489e-05 3.742e-04 0.120 0.904572
## Moyen -4.062e-03 8.660e-04 -4.690 3.69e-06 ***
## Secondr -5.420e-03 1.042e-03 -5.202 3.09e-07 ***
## Ltrcy_F -1.080e-03 2.616e-04 -4.130 4.37e-05 ***
## pct_cm_f -2.412e-01 1.605e-01 -1.502 0.133743
## pop_density 3.315e-06 8.562e-07 3.872 0.000125 ***
## Ltrcy_A 8.242e-04 3.680e-04 2.240 0.025623 *
## Ltrcy_W 3.519e-04 3.775e-04 0.932 0.351822
## Ltrcy_M -5.074e-03 1.121e-02 -0.453 0.651031
## fertlty 1.192e-04 7.926e-05 1.504 0.133427
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2763 on 423 degrees of freedom
## Multiple R-squared: 0.5892, Adjusted R-squared: 0.5785
## F-statistic: 55.16 on 11 and 423 DF, p-value: < 2.2e-16
###
model_2002 <- lm(formula = formula, data = MPI_data_dr_2002_for_model)
summary(model_2002)
##
## Call:
## lm(formula = formula, data = MPI_data_dr_2002_for_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.93801 -0.14626 0.02407 0.15328 0.92920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.502e+00 9.521e-02 -26.277 < 2e-16 ***
## menage 5.094e-03 6.425e-04 7.929 6.84e-14 ***
## Primair -5.197e-04 9.546e-04 -0.544 0.586581
## Moyen -4.774e-03 1.305e-03 -3.659 0.000307 ***
## Secondr -3.697e-03 1.733e-03 -2.133 0.033870 *
## Ltrcy_F 6.215e-04 1.007e-03 0.617 0.537825
## pct_cm_f 9.499e-02 2.789e-01 0.341 0.733699
## pop_density -7.412e-07 8.743e-07 -0.848 0.397346
## Ltrcy_A 3.452e-04 9.737e-05 3.545 0.000467 ***
## Ltrcy_W 2.135e-03 7.068e-04 3.021 0.002772 **
## Ltrcy_M -1.789e-02 1.212e-02 -1.477 0.140927
## fertlty 4.018e-05 5.000e-05 0.804 0.422424
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2598 on 256 degrees of freedom
## Multiple R-squared: 0.6361, Adjusted R-squared: 0.6205
## F-statistic: 40.69 on 11 and 256 DF, p-value: < 2.2e-16
library(corrr)
dfw_estimates_2013 <- MPI_data_dr_2013_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2013, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2013")

##
dfw_estimates_2002 <- MPI_data_dr_2002_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2002, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2002")

library(car)
vif(model_2013)
## menage Primair Moyen Secondr Ltrcy_F pct_cm_f
## 3.871359 6.364877 8.106134 6.016109 7.975205 1.078805
## pop_density Ltrcy_A Ltrcy_W Ltrcy_M fertlty
## 1.310951 1.753154 1.482194 1.495710 5.367791
vif(model_2002)
## menage Primair Moyen Secondr Ltrcy_F pct_cm_f
## 3.245263 296.905235 62.394454 23.511511 676.243716 1.192264
## pop_density Ltrcy_A Ltrcy_W Ltrcy_M fertlty
## 5.093219 4.316810 2.813059 2.154716 12.816729
Dimension reduction
with principal components analysis
pca_2013 <- prcomp(
formula = ~.,
data = dfw_estimates_2013,
scale. = TRUE,
center = TRUE
)
summary(pca_2013)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1830 1.2702 1.1990 1.0093 0.86570 0.70864 0.63550
## Proportion of Variance 0.4332 0.1467 0.1307 0.0926 0.06813 0.04565 0.03671
## Cumulative Proportion 0.4332 0.5799 0.7106 0.8032 0.87133 0.91698 0.95369
## PC8 PC9 PC10 PC11
## Standard deviation 0.46543 0.35450 0.28978 0.28829
## Proportion of Variance 0.01969 0.01142 0.00763 0.00756
## Cumulative Proportion 0.97339 0.98481 0.99244 1.00000
##
pca_2002 <- prcomp(
formula = ~.,
data = dfw_estimates_2002,
scale. = TRUE,
center = TRUE
)
summary(pca_2002)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.484 1.2669 1.1310 0.89828 0.6902 0.51195 0.42413
## Proportion of Variance 0.561 0.1459 0.1163 0.07335 0.0433 0.02383 0.01635
## Cumulative Proportion 0.561 0.7069 0.8232 0.89657 0.9399 0.96370 0.98005
## PC8 PC9 PC10 PC11
## Standard deviation 0.34446 0.24919 0.19411 0.03123
## Proportion of Variance 0.01079 0.00565 0.00343 0.00009
## Cumulative Proportion 0.99084 0.99649 0.99991 1.00000
pca_2013_tibble <- pca_2013$rotation %>%
as_tibble(rownames = "predictor")
pca_2013_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.411 0.0273 0.0207 -0.00165 -0.116 0.171 -0.100 -0.860
## 2 Primair 0.391 0.00136 0.292 -0.134 -0.0931 0.278 -0.182 0.321
## 3 Moyen 0.423 0.0591 -0.169 -0.0299 -0.123 -0.105 0.0880 0.292
## 4 Secondr 0.359 0.0776 -0.404 0.0669 -0.0637 -0.350 0.241 -0.0562
## 5 Ltrcy_F 0.419 -0.0249 -0.140 0.0607 0.118 -0.310 0.179 0.148
## 6 pct_cm_f -0.0501 -0.160 0.0499 -0.944 0.0273 -0.226 0.135 -0.0901
## 7 Ltrcy_A 0.187 -0.242 0.385 0.127 0.804 -0.193 -0.00979 -0.0681
## 8 Ltrcy_W 0.00824 -0.670 -0.121 0.0919 -0.249 -0.329 -0.598 0.0156
## 9 Ltrcy_M 0.00631 -0.670 -0.169 0.0737 -0.0179 0.453 0.556 0.0105
## 10 fertlty 0.397 -0.0175 0.210 -0.142 -0.0923 0.369 -0.195 0.177
## 11 pop_densi… 0.00230 -0.0771 0.682 0.178 -0.477 -0.355 0.370 -0.0462
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
###
pca_2002_tibble <- pca_2002$rotation %>%
as_tibble(rownames = "predictor")
pca_2002_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.317 -0.151 0.0760 1.53e-1 0.743 -0.386 0.0986 -0.334
## 2 Primair 0.386 -0.0977 -0.0903 1.77e-2 0.0916 0.125 0.0829 0.551
## 3 Moyen 0.366 0.0740 0.250 2.23e-1 -0.182 0.0868 -0.0602 0.0971
## 4 Secondr 0.259 0.199 0.519 3.79e-1 -0.230 0.162 -0.0422 -0.401
## 5 Ltrcy_F 0.394 -0.0113 0.0838 1.19e-1 -0.0357 0.120 0.0212 0.326
## 6 pct_cm_f -0.0469 -0.168 -0.612 7.50e-1 -0.0961 0.0674 -0.0988 -0.0935
## 7 Ltrcy_A 0.342 -0.0715 -0.174 -2.34e-1 -0.311 -0.481 -0.664 -0.115
## 8 Ltrcy_W 0.190 0.586 -0.273 -7.74e-6 -0.233 -0.446 0.540 -0.00999
## 9 Ltrcy_M 0.0267 0.707 -0.206 -4.56e-2 0.388 0.348 -0.425 -0.0105
## 10 fertlty 0.373 -0.141 -0.163 -1.40e-1 0.140 0.135 0.0543 0.0967
## 11 pop_density 0.316 -0.157 -0.315 -3.60e-1 -0.148 0.462 0.229 -0.526
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
pca_2013_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = " Loadings for first five principal components for Guediawaye Census 2013") +
theme_minimal()

pca_2002_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = " Loadings for first five principal components for Guediawaye Census 2002") +
theme_minimal()

components_2013 <- predict(pca_2013, dfw_estimates_2013)
dfw_pca_2013 <- MPI_data_dr_2013_for_model%>%
select(MPI_men) %>%
cbind(components_2013)
ggplot(dfw_pca_2013, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2013") +
theme_void() +
scale_fill_viridis_c()

components_2002 <- predict(pca_2002, dfw_estimates_2002)
dfw_pca_2002 <- MPI_data_dr_2002_for_model%>%
select(MPI_men) %>%
cbind(components_2002)
ggplot(dfw_pca_2002, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2002") +
theme_void() +
scale_fill_viridis_c()

dfw_pca_2002$RGPH<-"Census 2002"
dfw_pca_2013$RGPH<-"Census 2013"
dfw_pca <- dfw_pca_2002 %>%
plyr::rbind.fill(dfw_pca_2013) %>%
st_as_sf()
dfw_pca %>%
ggplot(aes(fill = PC1)) +
# Adding spatial features to the plot
geom_sf(color = NA) +
scale_fill_viridis_c()+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

dfw_pca %>%
ggplot(aes(fill = PC2)) +
# Adding spatial features to the plot
geom_sf(color = NA) +
scale_fill_viridis_c()+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

pca_2013_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2013_model <- lm(formula = pca_2013_formula, data = dfw_pca_2013)
summary(pca_2013_model)
##
## Call:
## lm(formula = pca_2013_formula, data = dfw_pca_2013)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24987 -0.18277 0.02653 0.20470 1.04081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.198765 0.015293 -143.780 < 2e-16 ***
## PC1 -0.053705 0.007013 -7.657 1.28e-13 ***
## PC2 -0.028598 0.012053 -2.373 0.0181 *
## PC3 0.190360 0.012769 14.908 < 2e-16 ***
## PC4 0.012829 0.015169 0.846 0.3982
## PC5 -0.024572 0.017685 -1.389 0.1654
## PC6 0.161101 0.021605 7.457 4.97e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.319 on 428 degrees of freedom
## Multiple R-squared: 0.4461, Adjusted R-squared: 0.4384
## F-statistic: 57.46 on 6 and 428 DF, p-value: < 2.2e-16
###
pca_2002_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2002_model <- lm(formula = pca_2002_formula, data = dfw_pca_2002)
summary(pca_2002_model)
##
## Call:
## lm(formula = pca_2002_formula, data = dfw_pca_2002)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85562 -0.14210 0.02181 0.16672 0.90081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.372798 0.016131 -147.095 < 2e-16 ***
## PC1 -0.034208 0.006506 -5.258 3.03e-07 ***
## PC2 -0.077519 0.012757 -6.077 4.33e-09 ***
## PC3 -0.184749 0.014289 -12.929 < 2e-16 ***
## PC4 -0.118827 0.017991 -6.605 2.22e-10 ***
## PC5 0.209272 0.023416 8.937 < 2e-16 ***
## PC6 -0.254087 0.031568 -8.049 2.96e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2641 on 261 degrees of freedom
## Multiple R-squared: 0.6167, Adjusted R-squared: 0.6079
## F-statistic: 70 on 6 and 261 DF, p-value: < 2.2e-16
Spatial
regression
MPI_data_dr_2013_for_model$residuals <- residuals(model_2013)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2013") +
theme_minimal()

MPI_data_dr_2002_for_model$residuals <- residuals(model_2002)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2002") +
theme_minimal()

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "Census 2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

library(spdep)
wts_2013 <- MPI_data_dr_2013_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2013_for_model$residuals, wts_2013)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2013_for_model$residuals
## weights: wts_2013
##
## Moran I statistic standard deviate = 4.7117, p-value = 1.228e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1325997182 -0.0023041475 0.0008197714
wts_2002 <- MPI_data_dr_2002_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2002_for_model$residuals, wts_2002)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2002_for_model$residuals
## weights: wts_2002
##
## Moran I statistic standard deviate = 2.1911, p-value = 0.01422
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.077202122 -0.003745318 0.001364795
MPI_data_dr_2013_for_model$lagged_residuals <- lag.listw(wts_2013, MPI_data_dr_2013_for_model$residuals)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2013") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_2002_for_model$lagged_residuals <- lag.listw(wts_2002, MPI_data_dr_2002_for_model$residuals)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2002") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "Census 2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals, y = lagged_residuals)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

Geographically weighted
regression
Choosing a bandwidth
for GWR
library(GWmodel)
library(sf)
dfw_data_sp_2013 <- MPI_data_dr_2013_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula,
data = dfw_data_sp_2013,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 276 CV score: 33.03686
## Adaptive bandwidth: 179 CV score: 32.93194
## Adaptive bandwidth: 117 CV score: 34.2892
## Adaptive bandwidth: 215 CV score: 32.81678
## Adaptive bandwidth: 240 CV score: 32.88297
## Adaptive bandwidth: 202 CV score: 32.80238
## Adaptive bandwidth: 191 CV score: 32.84514
## Adaptive bandwidth: 205 CV score: 32.78626
## Adaptive bandwidth: 211 CV score: 32.81625
## Adaptive bandwidth: 205 CV score: 32.78626
###
dfw_data_sp_2002 <- MPI_data_dr_2002_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula,
data = dfw_data_sp_2002,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 173 CV score: 30.77821
## Adaptive bandwidth: 115 CV score: 22.62838
## Adaptive bandwidth: 78 CV score: 18.01934
## Adaptive bandwidth: 56 CV score: 20.35081
## Adaptive bandwidth: 92 CV score: 18.83631
## Adaptive bandwidth: 69 CV score: 18.26553
## Adaptive bandwidth: 83 CV score: 18.02488
## Adaptive bandwidth: 74 CV score: 18.01254
## Adaptive bandwidth: 72 CV score: 18.08195
## Adaptive bandwidth: 75 CV score: 17.91883
## Adaptive bandwidth: 76 CV score: 17.96413
## Adaptive bandwidth: 74 CV score: 18.01254
## Adaptive bandwidth: 75 CV score: 17.91883
gw_model_2013 <- gwr.basic(
formula = formula,
data = dfw_data_sp_2013,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
###
gw_model_2002 <- gwr.basic(
formula = formula,
data = dfw_data_sp_2002,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
names(gw_model_2013)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
##
names(gw_model_2002)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
gw_model_2013_results <- gw_model_2013$SDF %>%
st_as_sf()
names(gw_model_2013_results)
## [1] "Intercept" "menage" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "pct_cm_f" "pop_density"
## [9] "Ltrcy_A" "Ltrcy_W" "Ltrcy_M" "fertlty"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "menage_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "pct_cm_f_SE"
## [25] "pop_density_SE" "Ltrcy_A_SE" "Ltrcy_W_SE" "Ltrcy_M_SE"
## [29] "fertlty_SE" "Intercept_TV" "menage_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "pct_cm_f_TV"
## [37] "pop_density_TV" "Ltrcy_A_TV" "Ltrcy_W_TV" "Ltrcy_M_TV"
## [41] "fertlty_TV" "Local_R2" "geometry"
###
gw_model_2002_results <- gw_model_2002$SDF %>%
st_as_sf()
names(gw_model_2002_results)
## [1] "Intercept" "menage" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "pct_cm_f" "pop_density"
## [9] "Ltrcy_A" "Ltrcy_W" "Ltrcy_M" "fertlty"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "menage_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "pct_cm_f_SE"
## [25] "pop_density_SE" "Ltrcy_A_SE" "Ltrcy_W_SE" "Ltrcy_M_SE"
## [29] "fertlty_SE" "Intercept_TV" "menage_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "pct_cm_f_TV"
## [37] "pop_density_TV" "Ltrcy_A_TV" "Ltrcy_W_TV" "Ltrcy_M_TV"
## [41] "fertlty_TV" "Local_R2" "geometry"
ggplot(gw_model_2013_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2013") +
theme_void()

ggplot(gw_model_2002_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2002") +
theme_void()

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = Local_R2)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \nHH")

ggplot(gw_model_2002_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \nHH")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = menage)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
labs(fill = "Local β for \nHH")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = pop_density)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for population density for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \npopulation density")

ggplot(gw_model_2002_results, aes(fill = pop_density)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for population density for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \npopulation density")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = pop_density)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
labs(fill = "Local β for \npopulation density")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

Classification and
clustering of Guediawaye census data
Geodemographic
classification
set.seed(1994)
dfw_kmeans_2013 <- dfw_pca_2013 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2013$cluster)
##
## 1 2 3 4 5 6
## 53 73 139 71 95 4
##
dfw_kmeans_2002 <- dfw_pca_2002 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2002$cluster)
##
## 1 2 3 4 5 6
## 71 38 3 3 85 68
dfw_clusters_2013 <- dfw_pca_2013 %>%
mutate(cluster = as.character(dfw_kmeans_2013$cluster))
ggplot(dfw_clusters_2013, aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Map of geodemographic clusters for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters_2002 <- dfw_pca_2002 %>%
mutate(cluster = as.character(dfw_kmeans_2002$cluster))
ggplot(dfw_clusters_2002, aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Map of geodemographic clusters for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

library(plotly)
cluster_plot <- ggplot(dfw_clusters_2013,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2013") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
cluster_plot <- ggplot(dfw_clusters_2002,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2002") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
Spatial clustering
& regionalization
library(spdep)
input_vars <- dfw_pca_2013 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2013, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 7,
crit = 10
)
dfw_clusters_2013$region <- as.character(regions$group)
ggplot(dfw_clusters_2013, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2013") +
scale_fill_brewer(palette = "Set1") +
theme_void()

input_vars <- dfw_pca_2002 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2002, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 7,
crit = 10
)
dfw_clusters_2002$region <- as.character(regions$group)
ggplot(dfw_clusters_2002, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2002") +
scale_fill_brewer(palette = "Set1") +
theme_void()

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = region)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set2") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)
